Exploratory overview of ASCOT anticoagulation data.

Author

James Totterdell

Published

May 24, 2022

Code
library(tidyverse)
library(patchwork)
library(DT)
library(plotly)
library(knitr)
library(kableExtra)

theme_set(theme_minimal(base_size = 12))

source("r/summary_functions.r")
Code
source("r/derive_full_datasets.r")
all_data <- read_all_no_daily()
all_daily_data <- read_all_daily()

Dictionary

Code
source("r/read_raw_data.r")
dict <- read_dictionary() %>%
  select(HeadingName, DbField, FieldTitle, FieldFormatting, ValidationNotes)
datatable(dict)

Data Completeness

Code
source("r/data_completeness.r")
completeness <- summarise_completeness_data(generate_completeness_data(all_daily_data))
completeness$combined %>%
  datatable() %>%
  formatStyle(2:10, 'text-align' = "right", "white-space" = "nowrap")

Available Records

Code
all_data %>%
  count(EL_rec, ENR_rec, BAS_rec, DIS_rec, D28_rec, WTH_rec) %>%
  kable("html") %>%
  kable_styling("striped", font_size = 14)
Table 1: Patterns of record availability for all screened participants.
EL_rec ENR_rec BAS_rec DIS_rec D28_rec WTH_rec n
1 0 0 0 0 0 623
1 1 0 1 1 0 1
1 1 1 0 0 1 18
1 1 1 0 1 1 1
1 1 1 1 0 0 1
1 1 1 1 1 0 1572
1 1 1 1 1 1 8

Enrolment

Study enrolment
md <- get_interim_dates()
id <- get_intervention_dates()
p1 <- all_data %>%
  filter(ENR_rec == 1) %>%
  count(RandDate) %>%
  complete(RandDate = 
             seq.Date(min(RandDate, na.rm = TRUE), 
                      Sys.Date(), 
                      by = "1 day"),
           fill = list(n = 0)) %>%
  mutate(`Cumulative enrolments` = cumsum(n)) %>%
  rename(`Randomisation date` = RandDate) %>%
  ggplot(., aes(`Randomisation date`, `Cumulative enrolments`)) +
  geom_step() +
  geom_vline(data = md,
             aes(xintercept = as.numeric(meet_date)),
             linetype = 2) +
  labs(x = "")

p2 <- id %>%
  mutate(
    Intervention = fct_inorder(val_labels(Intervention))
  ) %>%
  ggplot(.) +
  facet_grid(Domain ~ ., drop = TRUE, scales = "free_y") +
  geom_point(aes(x = Intervention, y = endate), shape = 4) +
  geom_segment(
    aes(x = Intervention, xend = Intervention, 
        y = stdate, yend = endate))  +
  geom_hline(data = md,
             aes(yintercept = as.numeric(meet_date)),
             linetype = 2) +
  coord_flip() +
  labs(x = "", y = "Calendar date")

p1 / p2

Study enrolment by country
p1 <- all_data %>%
  filter(ENR_rec == 1) %>%
  count(Country = PT_CountryName, RandDate) %>%
  complete(
    Country, 
    RandDate = seq.Date(min(RandDate, na.rm = T), Sys.Date(), by = "1 day"),
    fill = list(n = 0)) %>%
  group_by(Country) %>%
  mutate(`Cumulative enrolments` = cumsum(n)) %>%
  rename(`Randomisation date` = RandDate) %>%
  ggplot(., aes(`Randomisation date`, `Cumulative enrolments`)) +
  facet_wrap( ~ Country, ncol = 1, scales = "free_y") +
  geom_step() +
  geom_vline(data = md,
             aes(xintercept = as.numeric(meet_date)),
             linetype = 2) +
  labs(x = "")
(p1 / p2) + plot_layout(heights = c(5, 2))

Study enrolment by site
p1 <- all_data %>%
  filter(ENR_rec == 1) %>%
  count(Country = PT_CountryName, Site = PT_LocationName, RandDate) %>%
  complete(
    nesting(Country, Site),
    RandDate = seq.Date(min(RandDate), max(RandDate), by = "1 day"),
    fill = list(n = 0)) %>%
  group_by(Country, Site) %>%
  mutate(`Cumulative enrolments` = cumsum(n)) %>%
  rename(`Calendar date` = RandDate) %>%
  ggplot(., aes(`Calendar date`, `Cumulative enrolments`)) +
  facet_wrap( ~ paste(Country, Site, sep = ": "), 
              ncol = 5, scales = "free_y") +
  geom_step() +
  geom_vline(data = md,
             aes(xintercept = as.numeric(meet_date)),
             linetype = 2) +
  scale_y_continuous(breaks = function(x) 
    unique(floor(pretty(seq(0, (max(x) + 1) * 1.1)))))
ggplotly(p1, height = 1000)

Intervention Assignments

Code
all_data %>%
  count(FAS_ITT, ACS_ITT, CAssignment) %>%
  kable() %>%
  kable_styling("striped", font_size = 14)
Table 2: Counts of intervention assignments to anticoagulation
FAS_ITT ACS_ITT CAssignment n
1 0 C0 26
1 1 C1 620
1 1 C2 620
1 1 C3 285
1 1 C4 50
1 NA NA 623
Code
all_data %>%
  count(FAS_ITT, ACS_ITT, AVS_ITT, AAssignment, CAssignment) %>%
  kable() %>%
  kable_styling("striped", font_size = 14)
Table 3: Counts of intervention assignments to regimen
FAS_ITT ACS_ITT AVS_ITT AAssignment CAssignment n
1 0 1 A1 C0 15
1 0 1 A2 C0 11
1 1 0 A0 C1 568
1 1 0 A0 C2 566
1 1 0 A0 C3 278
1 1 0 A0 C4 35
1 1 1 A1 C1 27
1 1 1 A1 C2 19
1 1 1 A1 C3 6
1 1 1 A1 C4 4
1 1 1 A2 C1 25
1 1 1 A2 C2 35
1 1 1 A2 C3 1
1 1 1 A2 C4 11
1 NA NA NA NA 623
Intervention allocations overtime
p1 <- all_data %>%
  filter(ENR_rec == 1) %>%
  count(Intervention = CAssignment, RandDate) %>%
  complete(Intervention,
           RandDate = seq.Date(min(RandDate, na.rm = T),
                               max(Sys.Date(), na.rm = T),
                               by = "1 day"),
           fill = list(n = 0)) %>%
  group_by(Intervention) %>%
  mutate(cn = cumsum(n)) %>%
  ungroup() %>%
  ggplot(., aes(RandDate, cn)) +
  geom_step(aes(colour = Intervention)) +
  scale_color_viridis_d(option = "D") +
  labs(x = "", y = "Cumulative allocation")
p1 / p2

Country

Allocations to interventions by country of enrolment.

Allocations by site
all_data %>%
  filter(ENR_rec == 1) %>%
  count(Country = PT_CountryName, Intervention = CAssignment) %>%
  group_by(Country) %>%
  mutate(p = n / sum(n)) %>%
  mutate(lab = sprintf("%i (%.2f)", n, p)) %>%
  select(-n, -p) %>%
  ungroup() %>%
  spread(Intervention, lab, fill = "0 (0.00)") %>%
  kable(align = "lrrrrr") %>%
  kable_styling(bootstrap_options = "striped", font_size = 12)
Table 4: Allocation to anti-coagulation interventions by country
Country C0 C1 C2 C3 C4
Australia 24 (0.16) 52 (0.34) 60 (0.39) 7 (0.05) 11 (0.07)
India 0 (0.00) 499 (0.38) 520 (0.40) 277 (0.21) 4 (0.00)
Nepal 0 (0.00) 57 (0.47) 32 (0.27) 0 (0.00) 31 (0.26)
New Zealand 2 (0.07) 12 (0.44) 8 (0.30) 1 (0.04) 4 (0.15)

Site

Allocations to interventions by site of enrolment.

Allocations by site
p <- all_data %>%
  filter(ENR_rec == 1) %>%
  count(Country = Country, 
        Site = Location,
        Intervention = CAssignment) %>%
  group_by(Site = paste(Country, Site, sep = ": ")) %>%
  mutate(p = n / sum(n)) %>%
  ggplot(., aes(Intervention, n)) +
  facet_wrap( ~ Site, scales = "free_y") +
  geom_col() +
  scale_y_continuous("Frequency",
    breaks = function(x) 
      unique(floor(pretty(seq(0, (max(x) + 1) * 1.1)))))
ggplotly(p)

Baseline Factors

The following provides an overview of the baseline covariates collected for individuals.

There is an issue with the serum creatinine units for some participants. These have been reported as “umol/L” when in fact it is more likely they are in mg/dL (spike at values reported as umol/L and less than 1, noting that 1 mg/dL = 88.42 umol/L).

Code
all_data %>%
  filter(
    EL_SerumCreatinineUnits == "umol/L",
    EL_SerumCreatinineBlood <= 1.5
  ) %>%
  select(
    EligibilityCode, 
    StudyPatientID, 
    starts_with("EL_Serum")) %>%
  write_csv(file.path(ASCOT_DATA, "derived", "low_creatinine_values.csv"))
ggplot(all_data %>% 
         filter(!is.na(EL_SerumCreatinineUnits)), 
       aes(EL_SerumCreatinine_umolL)) +
  geom_histogram(bins = 50, boundary = 0) +
  facet_wrap( ~ EL_SerumCreatinineUnits) +
  xlim(0, 500)

Demographics

Baseline demographics are summarised by intervention in the following table.

Demographics table
sdat <- all_data %>%
    filter(ENR_rec == 1, CAssignment != "C0")
generate_baseline_demographics_table(sdat, format = "html")
Table 5: Baseline demographics for participants randomised into the anticoagulation domain.
Anticoagulation
Variable C1
(n = 620)
C2
(n = 620)
C3
(n = 285)
C4
(n = 50)
Overall
(n = 1575)
Age (years), Median (IQR) 49 (37, 60) 48 (37, 61) 51 (38, 62) 58 (46, 69) 49 (37, 61)
Sex
Male, n (\%) 361 (58) 392 (63) 158 (55) 25 (50) 936 (59)
Female, n (\%) 259 (42) 228 (37) 127 (45) 25 (50) 639 (41)
Weight (kg)
Median, (IQR) 68 (62, 76) 70 (62, 77) 68 (62, 76) 68 (57, 80) 69 (62, 76)
Missing, n (\%) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0)
Vaccinated1
Yes, n (\%) 194 (31) 222 (36) 42 (15) 27 (54) 485 (31)
Missing, n (\%) 32 (5) 22 (4) 29 (10) 0 (0) 83 (5)
Ethnicity
Indian, n (\%) 500 (81) 522 (84) 277 (97) 4 (8) 1303 (83)
Asian, n (\%) 20 (3) 12 (2) 1 (0) 10 (20) 43 (3)
European, n (\%) 23 (4) 18 (3) 4 (1) 4 (8) 49 (3)
Maori, n (\%) 3 (0) 4 (1) 0 (0) 3 (6) 10 (1)
Pacific Islander, n (\%) 13 (2) 13 (2) 2 (1) 3 (6) 31 (2)
African, n (\%) 1 (0) 0 (0) 1 (0) 0 (0) 2 (0)
Aboriginal, n (\%) 0 (0) 1 (0) 0 (0) 1 (2) 2 (0)
Latin American, n (\%) 0 (0) 1 (0) 0 (0) 0 (0) 1 (0)
Middle Eastern, n (\%) 12 (2) 12 (2) 0 (0) 0 (0) 24 (2)
Other, n (\%) 46 (7) 29 (5) 0 (0) 23 (46) 98 (6)
Unknown, n (\%) 8 (1) 9 (1) 0 (0) 3 (6) 20 (1)
Smoking
Current, n (\%) 17 (3) 22 (4) 3 (1) 5 (10) 47 (3)
Former, n (\%) 76 (12) 53 (9) 15 (5) 14 (28) 158 (10)
Never, n (\%) 527 (85) 545 (88) 267 (94) 31 (62) 1370 (87)
Missing, n (\%) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0)
1 Site LUD does not have ethics approval for collection of vaccination status and accounts for most missingness

Co-morbidities

Baseline co-morbidities are summarised by anti-coagulation intervention in the following table.

Co-morbidities table
sdat <- all_data %>%
    filter(ENR_rec == 1, CAssignment != "C0")
generate_baseline_comorbidities_table(sdat, format = "html")
Table 6: Baseline co-morbidities for participants randomised into then anticoagulation domain.
Anticoagulation
Comorbidity C1
(n = 620)
C2
(n = 620)
C3
(n = 285)
C4
(n = 50)
Overall
(n = 1575)
None, n (\%) 367 (59.2) 381 (61.5) 166 (58.2) 19 (38.0) 933 (59.2)
Hypertension, n (\%) 152 (24.5) 142 (22.9) 70 (24.6) 14 (28.0) 378 (24.0)
Diabetes, n (\%) 145 (23.4) 141 (22.7) 79 (27.7) 11 (22.0) 376 (23.9)
Obesity, n (\%) 25 (4.0) 24 (3.9) 3 (1.1) 5 (10.0) 57 (3.6)
Asthma, n (\%) 19 (3.1) 16 (2.6) 6 (2.1) 4 (8.0) 45 (2.9)
Chronic lung disease, n (\%) 16 (2.6) 13 (2.1) 1 (0.4) 7 (14.0) 37 (2.3)
Chronic cardiac disease, n (\%) 11 (1.8) 15 (2.4) 1 (0.4) 2 (4.0) 29 (1.8)
Obstructive sleep apnoea, n (\%) 3 (0.5) 3 (0.5) 2 (0.7) 0 (0.0) 8 (0.5)
Iatrogenic immunosuppression, n (\%) 1 (0.2) 6 (1.0) 0 (0.0) 0 (0.0) 7 (0.4)
Chronic kidney disease, n (\%) 0 (0.0) 5 (0.8) 1 (0.4) 0 (0.0) 6 (0.4)
Malignant neoplasm, n (\%) 1 (0.2) 2 (0.3) 0 (0.0) 1 (2.0) 4 (0.3)
Moderate or severe liver disease, n (\%) 2 (0.3) 1 (0.2) 0 (0.0) 0 (0.0) 3 (0.2)
Dialysis, n (\%) 0 (0.0) 1 (0.2) 0 (0.0) 0 (0.0) 1 (0.1)
HIV infection, n (\%) 1 (0.2) 0 (0.0) 0 (0.0) 0 (0.0) 1 (0.1)
Dementia, n (\%) 0 (0.0) 0 (0.0) 0 (0.0) 0 (0.0) 0 (0.0)
Missing, n (\%) 0 (0.0) 0 (0.0) 0 (0.0) 0 (0.0) 0 (0.0)

Prognostics

Baseline prognostics are summarised by anti-coagulation intervention in the following table.

Prognostics table
sdat <- all_data %>%
    filter(ENR_rec == 1, CAssignment != "C0")
generate_baseline_prognostics_table(sdat, format = "html")
Table 7: Baseline prognostic variables for participants randomised into anticoagulation domain.
Anticoagulation
Variable C1
(n = 620)
C2
(n = 620)
C3
(n = 285)
C4
(n = 50)
Overall
(n = 1575)
Was the patient on room air for any of the preceding 24 hours?
Yes, n (\%) 465 (75) 466 (75) 224 (79) 39 (78) 1194 (76)
Missing, n (\%) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0)
Was the patient's GCS < 15?
Yes, n (\%) 63 (10) 65 (10) 6 (2) 2 (4) 136 (9)
Missing, n (\%) 125 (20) 136 (22) 60 (21) 0 (0) 321 (20)
Peripheral oxygen saturation (SpO2) on room air (Lowest)
Median (IQR) 95 (94, 97) 96 (94, 97) 96 (94, 97) 94 (92, 96) 96 (94, 97)
Missing, n (\%) 154 (25) 151 (24) 56 (20) 10 (20) 371 (24)
Highest respiratory rate (breaths/minute)
Median (IQR) 22 (21, 26) 22 (21, 26) 22 (20, 26) 22 (20, 24) 22 (21, 26)
Missing, n (\%) 0 (0) 1 (0) 0 (0) 0 (0) 1 (0)
Highest recorded Urea in the last 24 hours (mmol/L)
Median (IQR) 4 (3, 5) 5 (4, 6) 4 (3, 6) 4 (3, 6) 4 (3, 6)
Missing, n (\%) 33 (5) 35 (6) 17 (6) 1 (2) 86 (5)
Highest recorded CRP in the last 24 hours (mg/L)
Median (IQR) 71 (37, 190) 75 (38, 220) 77 (44, 223) 68 (33, 129) 73 (38, 200)
Missing, n (\%) 79 (13) 62 (10) 20 (7) 29 (58) 190 (12)
APTT\\textsuperscript{1}
Median (IQR) 33 (29, 36) 33 (30, 36) 32 (28, 37) 33 (28, 38) 33 (29, 36)
Missing, n (\%) 437 (70) 445 (72) 197 (69) 35 (70) 1114 (71)
INR\\textsuperscript{1}
Mean (SD) 1.19 (0.39) 1.23 (0.58) 1.32 (1.34) 1.12 (0.18) 1.23 (0.72)
Missing, n (\%) 109 (18) 109 (18) 49 (17) 7 (14) 274 (17)
Fibrinogen\\textsuperscript{1} (g/L)
Mean (SD) 5.27 (1.95) 5.20 (1.59) 4.78 (1.40) 6.49 (1.52) 5.18 (1.69)
Missing, n (\%) 570 (92) 561 (90) 244 (86) 42 (84) 1417 (90)
Prothrombin time\\textsuperscript{1} (sec)
Median (IQR) 14 (13, 17) 14 (13, 17) 15 (13, 16) 13 (12, 14) 14 (13, 16)
Missing, n (\%) 199 (32) 208 (34) 118 (41) 10 (20) 535 (34)
Taking aspirin
Yes, n (\%) 20 (3) 26 (4) 2 (1) 3 (6) 51 (3)
Missing, n (\%) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0)
1 For APTT, INR, Fibrinogen, and Prothrombin only at least one required.

Age

Histogram of age
all_data %>%
  filter(ENR_rec == 1) %>%
  ggplot(., aes(AgeAtEntry)) + 
  geom_histogram(
    breaks = c(18, seq(20, 100, 5)), 
    colour = "white", 
    closed="left") +
  labs(
    x = "Age at randomisation (5-year bins, 30 to 34, 35 to 39, etc.)", 
    y = "Count") +
  scale_x_continuous(breaks = seq(20, 95, 5))

Histogram of age by anti-coagulation intervention
p <- all_data %>%
  filter(ENR_rec == 1) %>%
  ggplot(., aes(AgeAtEntry)) + 
  facet_wrap( ~ CAssignment, scales = "free_y", ncol = 5) +
  geom_histogram(
    breaks = c(18, seq(20, 100, 5)), 
    colour = "white", 
    closed="left") +
  labs(
    x = "Age at randomisation (5-year bins, 30 to 34, 35 to 39, etc.)", 
    y = "Count") +
  scale_x_continuous(breaks = seq(20, 95, 5))
ggplotly(p, width = 1700, height = 400)

Figure 7: Distribution of age by anti-coagulation intervention